EG started this on 20160403 how does the noise effect the loglikelihood Weibull model->theta is a scale, gamma is a shape parameter >0 Gompertz Model-> beta(G) is a scale, alpha(R) is a shape parameter>0

difference function of loglikelihood function of gompertz and weibull p.d.fs test if L(Weibull,X)>L(Gompertz,X) for parameters Weibull model->theta is a scale, gamma is a shape parameter >0 Gompertz Model-> beta is a scale, alpha is a shape parameter>0

For additive Gaussian noise e ~ N (0, sigma^2) with known variance sigma^2 sd of gaussian noise function max sd would be = 3*mean(inverse.gomp.CDF) min sd would be mean(inverse.gomp.CDF)

require(flexsurv)
## Loading required package: flexsurv
## Loading required package: survival
require(gplots)
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
#theta=0.025
#gamma=0.001
#test G and R in nested for loops
beta= 0.034  
#G G= (0.1,0.25)
alpha=0.01 
#R  R= (0.001,0.1)


# population size
N=100;
## Introduce random Gompertz function

#rgompertz(alpha,beta,N) gives random Gompertz numbers from inverse CDF of Gompertz
#where alpha and beta are 2 parameters, N is number of population
#generate gompertz random numbers by using inverse CDF
#generate random number with a given distribution of Gompertz
#prediction
rgompertz = function(alpha,beta, N){
  x.uniform = runif(N)
  #inverse of Gompertz CDF
  inverse.gomp.CDF = function(alpha,beta,y) {  (1/beta)*log(1 - (beta/alpha)*log(1-y)  ) }
  x.gompertz = inverse.gomp.CDF(alpha,beta, x.uniform)
  return(x.gompertz)
  }


##Introduce random Weibull function
rweibull= function(theta,gamma,N)
  {
  x.uniform= runif(n)
  inverse.wei.CDF=function(theta,gamma,y) { theta*(-log(1-y))^(1/gamma)}
  x.weibull=inverse.wei.CDF(theta,gamma,x.uniform)
  return(x.weibull)
  }

#create a function that calculates noise of lifespan
calculate.noise = function(i){
  
  #lifespan
  gaussian<-rnorm(N, mean = 0, sd=i)
  #observation
  #X<- gompertz.random +gaussian to be used in simulation later.
  #noise
  noise=sd(gaussian)
  
  return(noise)
  }




#generate gompertz random numbers (lifespan) 
#prediction
gompertz.random<-rgompertz(alpha,beta,N)
average.lifespan=mean(gompertz.random)

#initiliaze arrays for the variable values from loops 
sderr<- list()
Delta_LL<-list() 
G<-list()
R<-list()
LWei<-list()
LGomp<-list()
MeanLF<-list()
sdLS<-list()
Delta_LL.flex<-list()
LWei.flex<-list()
LGomp.flex<-list()
G.flex.estimated<-list()
R.flex.estimated<-list()
LLG.par<-list()
LLR.par<-list()
v.flex.estimated<-list()
lambda.flex.estimated<-list()
## simulate for parameters beta,alpha and noise=i to search effect of noise on delta likl
## with change in parameters

for (beta in c(0.05,0.08, 0.1,0.15,0.17, 0.2, 0.25)){
  for (alpha in c(1E-3, 0.002, 0.005,0.008, 0.01,0.03, 0.05)){ #fix alpha or in other words R shape parameter
    #for (sd in seq(round.lifespan,3*round.lifespan,by=1)){
    for (i in c(0, 0.5, 1, 2,3,4, 5)){ 
      #for (i in c(0)){ 
      
      
      
      
      #generate gompertz random numbers (lifespan) 
      #prediction
      gompertz.random<-rgompertz(alpha,beta,N)
      average.lifespan=mean(gompertz.random)
      
      #store average.lifespan into MeanLF list
      MeanLF[[length(MeanLF)+1]]=average.lifespan
      
      #check the sd by using calculate.noise() function
      sd.gaussian=calculate.noise(i)
      
      #generate gaussion random numbers 
      gaussian<-rnorm(N, mean = 2*average.lifespan, sd=i)
      
      #standard deviation of gompertz.random
      sd.lifespan=sd(gompertz.random)
      
      #store sd of lifespan into SdLS list
      sdLS[[length(sdLS)+1]] =sd.lifespan
      
      #add gaussian random numbers to gompertz random numbers
      lifespan<- gompertz.random +gaussian
      
      
      #calculate the mortality rate 
      m = alpha * exp( beta * lifespan )
      log_m = log(alpha) +  beta * lifespan; 
      beta.lifespan=beta*lifespan
      
      #pdf(paste("plots/","alpha=",alpha,".","beta=",beta, ".batch.pdf", sep=''), width=5, height=5)
      #plot(log_m ,beta.lifespan)
      #dev.off()
      
      #Log likelihood function for the Weibull model
      weib.likl<-function(param,y){
        theta<-exp(param[1])  #take exponential to avoid NaNs when taking log(theta)
        gamma<-exp(param[2])  # avoid NaNs when taking log(gamma)
        delta=1; # delta is 1 for right censored data which is our case; lifespan>0
        y=lifespan[!is.na(lifespan)]
        
        logl<-sum(delta*(log(gamma) + gamma*log(theta) + (gamma-1)*log(y) -
                           (theta*y)^gamma )) -sum((1-delta)*(theta*y)^gamma)
        
        return(-logl)
        }
      # take log(param) since you take exponential above to avoid NaN values above
      weib=optim(log(c(0.03,0.01)),weib.likl,y=lifespan)
      weib$value
      LWei[[length(LWei)+1]] = weib$value
      
      
      #beta=0.05; alpha=0.02 
      # Log likelihood function of gompertz distiribution
      gomp.likl <- function (param,y){
        beta<-param[1]
        alpha<-param[2]
        delta=1
        y=lifespan[!is.na(lifespan)]
        logl<-sum(delta*(log(beta)+alpha*y+(-(beta/alpha)*(exp(alpha*y)-1)))) +
          sum((1-delta)*(-(beta/alpha)*(exp(alpha*y)-1)))
        return(-logl)
        }
      gomp<-optim(c(0.03,0.01),gomp.likl,y=lifespan)
      
      gomp$value
      
      #store loglikelihood values of gompertz optimized results into LGomp list
      LGomp[[length(LGomp)+1]] = gomp$value
      
      # store R and G estimation from optim of likl functions in Gompertz
      LLG.par[[length(LLG.par)+1]] =gomp$par[1]
      LLR.par[[length(LLR.par)+1]]=gomp$par[2]
      
      delta.likelihood.wei<- weib$value-gomp$value
      
      #calculate LL and noise change
      sderr[[length(sderr)+1]] = i       
      Delta_LL[[length(Delta_LL)+1]] = delta.likelihood.wei
      G[[length(G)+1]]=beta
      #switch to alpha.seq when for fixed beta
      R[[length(R)+1]]=alpha
      
      #todo use flexsurv to calculate the LL
      
      #flexsurv only works with positive variables.
      #fix gaussian std to 0
      
      gaussian.flex= rnorm(N, mean = 2*average.lifespan, sd=0)
      X.flex= gompertz.random +gaussian.flex
      
      
      fitGomp = flexsurvreg(formula = Surv(X.flex) ~ 1, dist="gompertz")
      fitWei = flexsurvreg(formula = Surv(X.flex) ~ 1, dist="weibull")
      
      
      LWei.flex[[length(LWei.flex)+1]]=fitWei$loglik
      
      LGomp.flex[[length(LGomp.flex)+1]]=fitGomp$loglik
      
      param.Gomp<-fitGomp$res; R.flex<-param.Gomp[1]; G.flex<-param.Gomp[2];
      
      R.flex.estimated[[length(R.flex.estimated)+1]]<-R.flex
      G.flex.estimated[[length(G.flex.estimated)+1]]<-G.flex
      
      param.Wei<-fitWei$res; v.flex<-param.Wei[1]; lambda.flex<-param.Wei[2];
      
      v.flex.estimated[[length(v.flex.estimated)+1]]<-v.flex; 
      lambda.flex.estimated[[length(lambda.flex.estimated)+1]]<-lambda.flex
      
      delta_flexsurv=fitWei$loglik-fitGomp$loglik 
      
      #fitWei$loglik
      
      Delta_LL.flex[[length(Delta_LL.flex)+1]]=delta_flexsurv
      
      }
    }
  }

make a semi-log plot

Gompertz distribution log of mortality rate vs beta*lifespan to check if it is linear

m = alpha * exp( beta * lifespan )
      log_m = log(alpha) +  beta * lifespan; 
      

#pdf(paste("plots/","alpha=",alpha,".","beta=",beta, ".batch.pdf", sep=''), width=5, height=5)
plot(log_m ,lifespan)

#dev.off()
#make a data frame of list variables 
results = data.frame(cbind(sderr), cbind(R),cbind(LLR.par),cbind(R.flex.estimated),cbind(G),cbind(LLG.par),cbind(G.flex.estimated),cbind(Delta_LL) , cbind(Delta_LL.flex),
                     cbind(LWei),cbind(LWei.flex), cbind(LGomp),cbind(LGomp.flex), cbind(MeanLF), cbind(sdLS))



# create a matrix from results data frame to store in Results.csv
results_mat<-as.matrix(results)
write.csv(results_mat,file="Results.csv")

#write.csv(results_mat,file="noise_zero.csv")

#unlist variables
dLL<-unlist(results$Delta_LL )
dLL.flex<-unlist(results$Delta_LL.flex)
LLGomp<-unlist(results$LGomp)
LLGomp.flex<- unlist(results$LGomp.flex)
LLWei<- unlist(results$LWei)
LLWei.flex<- unlist(results$LWei.flex)
simulated.G<-unlist(results$G)
estimated.G.flex<-unlist(results$G.flex.estimated)
simulated.R<-unlist(results$R)
estimated.R.flex<-unlist(results$R.flex.estimated)

estimatedLL.G<-unlist(results$LLG.par)
estimatedLL.R<-unlist(results$LLR.par)

find the linear regression summary

summary( lm( dLL~ dLL.flex))
## 
## Call:
## lm(formula = dLL ~ dLL.flex)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.1206 -0.5270  0.4211  0.8295  6.3646 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.96322    0.11519  -8.362  1.6e-15 ***
## dLL.flex    -0.87082    0.02897 -30.064  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.511 on 341 degrees of freedom
## Multiple R-squared:  0.7261, Adjusted R-squared:  0.7253 
## F-statistic: 903.8 on 1 and 341 DF,  p-value: < 2.2e-16
summary( lm( LLGomp ~ LLGomp.flex))
## 
## Call:
## lm(formula = LLGomp ~ LLGomp.flex)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.617  -9.160  -2.081   4.709  48.782 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 50.95039    4.67913   10.89   <2e-16 ***
## LLGomp.flex -0.87853    0.01357  -64.74   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.29 on 341 degrees of freedom
## Multiple R-squared:  0.9248, Adjusted R-squared:  0.9245 
## F-statistic:  4191 on 1 and 341 DF,  p-value: < 2.2e-16
summary( lm( LLWei ~ LLWei.flex))
## 
## Call:
## lm(formula = LLWei ~ LLWei.flex)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.321  -8.393  -1.413   4.739  53.022 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 47.87647    4.39432   10.89   <2e-16 ***
## LLWei.flex  -0.88483    0.01285  -68.87   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.64 on 341 degrees of freedom
## Multiple R-squared:  0.9329, Adjusted R-squared:  0.9327 
## F-statistic:  4743 on 1 and 341 DF,  p-value: < 2.2e-16
summary(lm(LLWei~LLGomp))
## 
## Call:
## lm(formula = LLWei ~ LLGomp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.6808 -1.7530  0.6038  2.3202  4.9265 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3.746237   1.233717  -3.037  0.00258 ** 
## LLGomp       1.000964   0.003489 286.928  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.887 on 341 degrees of freedom
## Multiple R-squared:  0.9959, Adjusted R-squared:  0.9959 
## F-statistic: 8.233e+04 on 1 and 341 DF,  p-value: < 2.2e-16
summary(lm(LLWei.flex~LLGomp.flex))
## 
## Call:
## lm(formula = LLWei.flex ~ LLGomp.flex)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.9219 -2.1739 -0.8229  1.7590  8.9761 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.337985   1.075108   2.175   0.0303 *  
## LLGomp.flex 0.998624   0.003118 320.279   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.824 on 341 degrees of freedom
## Multiple R-squared:  0.9967, Adjusted R-squared:  0.9967 
## F-statistic: 1.026e+05 on 1 and 341 DF,  p-value: < 2.2e-16
summary(lm(simulated.G~estimated.G.flex))
## 
## Call:
## lm(formula = simulated.G ~ estimated.G.flex)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.09842 -0.05809  0.00196  0.05159  0.13519 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        0.148425   0.003988  37.215  < 2e-16 ***
## estimated.G.flex -27.929553   9.603754  -2.908  0.00387 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0648 on 341 degrees of freedom
## Multiple R-squared:  0.0242, Adjusted R-squared:  0.02134 
## F-statistic: 8.458 on 1 and 341 DF,  p-value: 0.003874
summary(lm(simulated.R~estimated.R.flex))
## 
## Call:
## lm(formula = simulated.R ~ estimated.R.flex)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.024433 -0.011639 -0.005756  0.010375  0.039543 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.003336   0.002149   1.553    0.121    
## estimated.R.flex 0.069033   0.011492   6.007 4.85e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01608 on 341 degrees of freedom
## Multiple R-squared:  0.09569,    Adjusted R-squared:  0.09304 
## F-statistic: 36.08 on 1 and 341 DF,  p-value: 4.846e-09
summary(lm(simulated.R~estimatedLL.R))
## 
## Call:
## lm(formula = simulated.R ~ estimatedLL.R)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.024678 -0.011004 -0.006313  0.010732  0.039144 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.004236   0.002252   1.881   0.0608 .  
## estimatedLL.R 0.072217   0.013731   5.260 2.55e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01626 on 341 degrees of freedom
## Multiple R-squared:  0.07504,    Adjusted R-squared:  0.07232 
## F-statistic: 27.66 on 1 and 341 DF,  p-value: 2.555e-07
summary(lm(simulated.G~estimatedLL.G))
## 
## Call:
## lm(formula = simulated.G ~ estimatedLL.G)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.100884 -0.060921  0.008902  0.057436  0.109104 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.140896   0.003905  36.081   <2e-16 ***
## estimatedLL.G 5.078914   4.299420   1.181    0.238    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06546 on 341 degrees of freedom
## Multiple R-squared:  0.004076,   Adjusted R-squared:  0.001155 
## F-statistic: 1.395 on 1 and 341 DF,  p-value: 0.2383
results.sub<-data.frame(cbind(sderr),cbind(R),cbind(G),cbind(Delta_LL))

R vs noise for fixed G

R.els = unlist( unique(results.sub$R))
colnum = length(R.els)

tmp = unlist( unique(results.sub$sderr))
noise.els = tmp[order(tmp)]
rownum = length(noise.els)

mat = matrix( data=NA, nrow= rownum, ncol=colnum) #noise as row, alpha as columns
rownames(mat) = noise.els
colnames(mat) = R.els


for (k in c(0.05,0.08, 0.1,0.15,0.17, 0.2, 0.25)){
  data = results.sub[results.sub[,3]==k, 4]
  
  
  
  
  data<-unlist(data)
  
  heat_mat<-matrix(data,ncol=colnum,nrow=rownum)
  
  #rownames(heat_mat, do.NULL = TRUE, prefix = "row")
  rownames(heat_mat) <- c("0","0.5","1","2","3","4","5")
  
  colnames(heat_mat) <- R.els
  library(gplots)
  hM <- format(round(heat_mat, 2))
  data_mat<-scale(heat_mat,scale=TRUE,center=FALSE)
  
  
  #paste(file = "~/github/model.comparison/plots/heatplot_zero_noise_G",k,".jpeg",sep="") 
  #jpeg(paste("plots/",k, ".fixed.G.jpg", sep=''))
  
  #paste(“myplot_”, i, “.jpeg”, sep=””)
  
  heatmap.2(data_mat, cellnote=hM,col = cm.colors(256), scale="none", notecol="black",  margins=c(5,10),
            dendrogram='none', Rowv=FALSE, Colv=FALSE,trace='none',
            xlab     = "R parameters",
            ylab     = "noise", main = bquote(paste("R vs. sd dLL at" ~ G==.(k))),par(cex.main=.5),srtCol=315, adjCol = c(0,1),cexRow=0.8,cexCol=0.8)
  
  
  #dev.off()
  
  }
## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

R vs G for fixed noise

G.els = unlist( unique(results.sub$G))
colnum = length(G.els)

R.els=unlist(unique(results.sub$R))
rownum = length(R.els)

mat = matrix( data=NA, nrow= rownum, ncol=colnum) #noise as row, alpha as columns
rownames(mat) = R.els
colnames(mat) = G.els
for (n in c(0, 0.5, 1,2,3,4,5) ){
  data = results.sub[results.sub[,1]==n, 4]
  
  
  
  
  data<-unlist(data)
  
  heat_mat<-matrix(data,ncol=colnum,nrow=rownum)
  
  #rownames(heat_mat, do.NULL = TRUE, prefix = "row")
  rownames(heat_mat) <- R.els
  
  colnames(heat_mat) <- G.els
  library(gplots)
  hM <- format(round(heat_mat, 2))
  data_mat<-scale(heat_mat,scale=TRUE,center=FALSE)
  
  
  #paste(file = "~/github/model.comparison/plots/heatplot_zero_noise_G",k,".jpeg",sep="") 
  #jpeg(paste("plots/",n, ".fixed_noise.jpg", sep=''))
  
  #paste(“myplot_”, i, “.jpeg”, sep=””)
  
  heatmap.2(data_mat, cellnote=hM,col = cm.colors(256), scale="none", notecol="black",  margins=c(5,10),
            dendrogram='none', Rowv=FALSE, Colv=FALSE,trace='none',
            xlab     = "R parameters",
            ylab     = "G parameters", main = bquote(paste("R vs G of dLL at" ~ sd==.(n))),par(cex.main=.5),srtCol=315, adjCol = c(0,1),cexRow=0.8,cexCol=0.8)
  
  
  #dev.off()
  
  }
## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

G vs noise for fixed R

G.els = unlist( unique(results.sub$G))
colnum = length(G.els)

tmp = unlist( unique(results.sub$sderr))
noise.els = tmp[order(tmp)]
rownum = length(noise.els)

mat = matrix( data=NA, nrow= rownum, ncol=colnum) #noise as row, alpha as columns
rownames(mat) = noise.els
colnames(mat) = G.els
for (j in c(1E-3, 0.002, 0.005,0.008, 0.01,0.03, 0.05) ){
  data = results.sub[results.sub[,2]==j, 4]
  
  
  
  
  data<-unlist(data)
  
  heat_mat<-matrix(data,ncol=colnum,nrow=rownum)
  
  #rownames(heat_mat, do.NULL = TRUE, prefix = "row")
  rownames(heat_mat) <- c("0","0.5","1","2","3","4","5")
  
  colnames(heat_mat) <- G.els
  library(gplots)
  hM <- format(round(heat_mat, 2))
  data_mat<-scale(heat_mat,scale=TRUE,center=FALSE)
  
  
  #paste(file = "~/github/model.comparison/plots/heatplot_zero_noise_G",k,".jpeg",sep="") 
  #jpeg(paste("plots/",j, ".fixed_R.jpg", sep=''))
  
  #paste(“myplot_”, i, “.jpeg”, sep=””)
  
  heatmap.2(data_mat, cellnote=hM,col = cm.colors(256), scale="none", notecol="black",  margins=c(5,10),
            dendrogram='none', Rowv=FALSE, Colv=FALSE,trace='none',
            xlab     = "G parameters",
            ylab     = "noise", main = bquote(paste("G vs. sd of dLL at" ~ R==.(j))),par(cex.main=.5),srtCol=315, adjCol = c(0,1),cexRow=0.8,cexCol=0.8)
  
  
  #dev.off()
  
  }
## NULL

## NULL

## NULL

## NULL

## NULL

## NULL

## NULL